#clean up
rm(list=ls())

#load libraries
library(lattice)
library(xtable)
library(foreign)
library(maps)
library(geoR)
library(rgdal)
library(car)
library(maptools)
library(textclean)

#options
options(scipen=8)

#load data
combined<-read.dta('cces08reformattedFine.dta')
senate2011<-read.dta("senate2011.dta")
house2011<-read.dta("house2011.dta")
nominate<-read.dta("commonSpace112.dta") #2011-2012 term of Congress

###Cleaning Shor & McCarty Data###
table(senate2011$st)
length(table(senate2011$st))#No KY, MN, or MO. No NE (upper chamber).
table(house2011$st)
length(table(house2011$st))#No KY or MA. NE as lower chamber.

#Reclassify district numbers in Shor & McCarty from factors to characters
senate2011$sdistrict2011<-as.character(senate2011$sdistrict2011)
house2011$hdistrict2011<-as.character(house2011$hdistrict2011)

#make Nebraska's unicameral an upper chamber
ne.uni<-subset(house2011,subset=st=="NE")
house2011<-subset(house2011,subset=st!="NE")
senate2011<-rbind(senate2011,ne.uni)
senate2011$sdistrict2011[senate2011$st=='NE']<-senate2011$hdistrict2011[senate2011$st=='NE']

#Recodes for Shor & McCarty data to link with kriged points using Census district codes.
senate2011$sdistrict2011[senate2011$st=='VT']<-recode(senate2011$sdistrict2011[senate2011$st=='VT'],'"ADDISON"="ADD";"BENNINGTON"="BEN";"CALEDONIA"="CAL";"CHITTENDEN"="CHI";"ESSEX-ORLEANS"="E-O";"FRANKLIN"="FRA";"GRAND ISLE"="CGI";"LAMOILLE"="LAM";"ORANGE"="ORA";"RUTLAND"="RUT";"WASHINGTON"="WAS";"WINDHAM"="WDM";"WINDSOR"="WSR"')
house2011$hdistrict2011[house2011$st=='WA']<-substr(house2011$hdistrict2011[house2011$st=='WA'],1,3)
house2011$hdistrict2011[house2011$st=='VT']<-recode(house2011$hdistrict2011[house2011$st=='VT'], '"ADDISON-1"="A-1";"ADDISON-2"="A-2";"ADDISON-3"="A-3";"ADDISON-4"="A-4";"ADDISON-5"="A-5";"ADDISON-RUTLAND-1"="AR1";"BENNINGTON-1"="B-1";"BENNINGTON-2-1"="B21";"BENNINGTON-2-2"="B22";"BENNINGTON-3"="B-3";"BENNINGTON-4"="B-4";"BENNINGTON-5"="B-5";"BENNINGTON-RUTLAND-1"="BR1";"CALEDONIA-1"="CA1";"CALEDONIA-2"="CA2";"CALEDONIA-3"="CA3";"CALEDONIA-4"="CA4";"CALEDONIA-WASHINGTON-1"="CAW";"CHITTENDEN-1-1"="C11";"CHITTENDEN-1-2"="C12";"CHITTENDEN-2"="C-2";"CHITTENDEN-3-1"="C31";"CHITTENDEN-3-10"="C35";"CHITTENDEN-3-2"="C32";"CHITTENDEN-3-3"="C33";"CHITTENDEN-3-4"="C34";"CHITTENDEN-3-5"="C35";"CHITTENDEN-3-6"="C36";"CHITTENDEN-3-7"="C37";"CHITTENDEN-3-8"="C38";"CHITTENDEN-3-9"="C39";"CHITTENDEN-4"="C-4";"CHITTENDEN-5-1"="C51";"CHITTENDEN-5-2"="C52";"CHITTENDEN-6-1"="C61";"CHITTENDEN-6-2"="C62";"CHITTENDEN-6-3"="C63";"CHITTENDEN-7-1"="C71";"CHITTENDEN-7-2"="C72";"CHITTENDEN-8"="C-8";"CHITTENDEN-9"="C-9";"ESSEX-CALEDONIA"="EC1";"ESSEX-CALEDONIA-ORLEANS"="EC2";"FRANKLIN-1"="F-1";"FRANKLIN-2"="F-2";"FRANKLIN-3"="F-3";"FRANKLIN-4"="F-4";"FRANKLIN-5"="F-5";"FRANKLIN-6"="F-6";"GRAND ISLE-CHITTENDEN-1-1"="GC1";"LAMOILLE-1"="L-1";"LAMOILLE-2"="L-2";"LAMOILLE-3"="L-3";"LAMOILLE-4"="L-4";"LAMOILLE-WASHINGTON-1"="LW1";"ORANGE-1"="OG1";"ORANGE-2"="OG2";"ORANGE-ADDISON-1"="OA1";"ORANGE-CALEDONIA-1"="OGC";"ORLEANS-1"="OL1";"ORLEANS-2"="OL2";"ORLEANS-CALEDONIA-1"="OLC";"ORLEANS-FRANKLIN-1"="OLF";"RUTLAND-1-1"="R11";"RUTLAND-1-2"="R12";"RUTLAND-2"="R-2";"RUTLAND-3"="R-3";"RUTLAND-4"="R-4";"RUTLAND-5-1"="R51";"RUTLAND-5-2"="R52";"RUTLAND-5-3"="R53";"RUTLAND-5-4"="R54";"RUTLAND-6"="R-6";"RUTLAND-7"="R-7";"RUTLAND-8"="R-8";"RUTLAND-WINDSOR-1"="RY1";"WASHINGTON-1"="W-1";"WASHINGTON-2"="W-2";"WASHINGTON-3-1"="W31";"WASHINGTON-3-2"="W32";"WASHINGTON-3-3"="W33";"WASHINGTON-4"="W-4";"WASHINGTON-5"="W-5";"WASHINGTON-6"="W-6";"WASHINGTON-7"="W-7";"WASHINGTON-CHITTENDEN-1"="WC1";"WINDHAM-1"="X-1";"WINDHAM-2"="X-2";"WINDHAM-3-1"="X31";"WINDHAM-3-2"="X32";"WINDHAM-3-3"="X33";"WINDHAM-4"="X-4";"WINDHAM-5"="X-5";"WINDHAM-6"="X-6";"WINDHAM-BENNINGTON-1"="XB1";"WINDHAM-BENNINGTON-WINDSOR-1"="XBY";"WINDSOR-1-1"="Y11";"WINDSOR-1-2"="Y12";"WINDSOR-2"="Y-2";"WINDSOR-3"="Y-3";"WINDSOR-4"="Y-4";"WINDSOR-5"="Y-5";"WINDSOR-6-1"="Y61";"WINDSOR-6-2"="Y62";"WINDSOR-ORANGE-1"="YO1";"WINDSOR-ORANGE-2"="YO2";"WINDSOR-RUTLAND-1"="YR1";"WINDSOR-RUTLAND-2"="YR2"')
house2011$hdistrict2011[house2011$st=='NH']<-recode(house2011$hdistrict2011[house2011$st=='NH'], '"BELKNAP 1"="101";"BELKNAP 2"="102";"BELKNAP 3"="103";"BELKNAP 4"="104";"BELKNAP 5"="105";"BELKNAP 6"="106";"CARROLL 1"="201";"CARROLL 2"="202";"CARROLL 3"="203";"CARROLL 4"="204";"CARROLL 5"="205";"CHESHIRE 1"="301";"CHESHIRE 2"="302";"CHESHIRE 3"="303";"CHESHIRE 4"="304";"CHESHIRE 5"="305";"CHESHIRE 6"="306";"CHESHIRE 7"="307";"COOS 1"="401";"COOS 2"="402";"COOS 3"="403";"COOS 4"="404";"GRAFTON 1"="501";"GRAFTON 10"="510";"GRAFTON 11"="511";"GRAFTON 2"="502";"GRAFTON 3"="503";"GRAFTON 4"="504";"GRAFTON 5"="505";"GRAFTON 6"="506";"GRAFTON 7"="507";"GRAFTON 8"="508";"GRAFTON 9"="509";"HILLSBOROUGH 1"="601";"HILLSBOROUGH 10"="610";"HILLSBOROUGH 11"="611";"HILLSBOROUGH 12"="612";"HILLSBOROUGH 13"="613";"HILLSBOROUGH 14"="614";"HILLSBOROUGH 15"="615";"HILLSBOROUGH 16"="616";"HILLSBOROUGH 17"="617";"HILLSBOROUGH 18"="618";"HILLSBOROUGH 19"="619";"HILLSBOROUGH 2"="602";"HILLSBOROUGH 20"="620";"HILLSBOROUGH 21"="621";"HILLSBOROUGH 22"="622";"HILLSBOROUGH 23"="623";"HILLSBOROUGH 24"="624";"HILLSBOROUGH 25"="625";"HILLSBOROUGH 26"="626";"HILLSBOROUGH 27"="627";"HILLSBOROUGH 3"="603";"HILLSBOROUGH 4"="604";"HILLSBOROUGH 5"="605";"HILLSBOROUGH 6"="606";"HILLSBOROUGH 7"="607";"HILLSBOROUGH 8"="608";"HILLSBOROUGH 9"="609";"MERRIMACK 1"="701";"MERRIMACK 10"="710";"MERRIMACK 11"="711";"MERRIMACK 12"="712";"MERRIMACK 13"="713";"MERRIMACK 2"="702";"MERRIMACK 3"="703";"MERRIMACK 4"="704";"MERRIMACK 5"="705";"MERRIMACK 6"="706";"MERRIMACK 7"="707";"MERRIMACK 8"="708";"MERRIMACK 9"="709";"ROCKINGHAM 014"="814";"ROCKINGHAM 1"="801";"ROCKINGHAM 10"="810";"ROCKINGHAM 11"="811";"ROCKINGHAM 12"="812";"ROCKINGHAM 13"="813";"ROCKINGHAM 14"="814";"ROCKINGHAM 15"="815";"ROCKINGHAM 16"="816";"ROCKINGHAM 17"="817";"ROCKINGHAM 18"="818";"ROCKINGHAM 2"="802";"ROCKINGHAM 3"="803";"ROCKINGHAM 4"="804";"ROCKINGHAM 5"="805";"ROCKINGHAM 6"="806";"ROCKINGHAM 7"="807";"ROCKINGHAM 8"="808";"ROCKINGHAM 9"="809";"STRAFFORD 1"="901";"STRAFFORD 2"="902";"STRAFFORD 3"="903";"STRAFFORD 4"="904";"STRAFFORD 5"="905";"STRAFFORD 6"="906";"STRAFFORD 7"="907";"SULLIVAN 1"="001";"SULLIVAN 2"="002";"SULLIVAN 3"="003";"SULLIVAN 4"="004";"SULLIVAN 5"="005"')

#Number of samples
N<-100

#### PROCESSING MCMC SAMPLES ####
first.sample<-read.csv('bootstrap/sample1.csv')
mcmc<-matrix(NA,nrow=N,ncol=ncol(first.sample))
colnames(mcmc)<-colnames(first.sample)
rm(first.sample)
for(i in 1:N){
	check<-file.exists(paste('bootstrap/sample',i,'.csv',sep=''))
	if(check){
		current.sample<-read.csv(paste('bootstrap/sample',i,'.csv',sep=''))
		mcmc[i,]<-apply(current.sample,2,mean)
		rm(current.sample)
	}else{
		print(paste('File sample',i,'.csv does not exist.',sep=''))
		}
	}

#compute tau squared alone
tausq.alone<-mcmc[,"tausq.rel"]*mcmc[,"sigmasq"]
mcmc<-cbind(mcmc,tausq.alone)

#Density Plots
png("tausq.png")
densityplot(mcmc[,"tausq.alone"],xlab=expression(tau^2))
dev.off()
png("tausqRel.png")
densityplot(mcmc[,"tausq.rel"],xlab=expression(tau^2/sigma^2))
dev.off()
png("phi.png")
densityplot(mcmc[,"phi"],xlab=expression(1/phi))
dev.off()
png("sigmasq.png")
densityplot(mcmc[,"sigmasq"],xlab=expression(sigma^2))
dev.off()

#check coefficient ordering, also used in residuals for variogram
combined.48<-subset(combined,subset=state!="AK" & state !="HI")
ideol.ols<-lm(ideology~age*educ+as.factor(race)*female+inc14+
	I(catholic+orthodox)+I(evangelical+mormon)+I(islam+jewish)+mainline+rural+ownership
	+as.factor(empstat)+eastings*northings+I(eastings^2)+I(northings^2),
	data=combined.48);summary(ideol.ols)
#length(ideol.ols$residuals)#21764
ideol.ols$coefficients
ideol.ols$coefficients[c(16:19,23)]
apply(mcmc[,c(16:19,23)],2,mean)

#rescale eastings and northings polynomial coefficients
mcmc[,"beta15"]<-mcmc[,"beta15"]*1000
mcmc[,"beta16"]<-mcmc[,"beta16"]*1000
mcmc[,"beta17"]<-mcmc[,"beta17"]*1000000
mcmc[,"beta18"]<-mcmc[,"beta18"]*1000000
mcmc[,"beta22"]<-mcmc[,"beta22"]*1000000

#Output in a LaTeX table
xtable(
cbind(
apply(mcmc,2,mean),
apply(mcmc,2,sd),
apply(mcmc,2,quantile,.05),
apply(mcmc,2,quantile,.95)
),
digits=4)

###plot empirical variogram###
#create variograms
ideol.robust<-variog(coords=cbind(combined.48$eastings,combined.48$northings), data=combined.48$ideology, estimator.type="modulus")
combined.48$ols.resid<-ideol.ols$residuals
resid.robust<-variog(coords=cbind(combined.48$eastings,combined.48$northings), data=combined.48$ols.resid, estimator.type="modulus")
gaussian.fit <- variofit(resid.robust, ini.cov.pars = c(100,3000), cov.model="gaussian", fix.nugget=FALSE, nugget=700)

#OLS output
#postscript('olsGaussian.eps',width=5,height=5)
#pdf('olsGaussian.pdf',width=5,height=5)
plot(ideol.robust, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(600,725))
par(new=T)
plot(resid.robust, xlab="", ylab="",axes=F,ylim=c(600,800),pch='+', col='blue')
lines(gaussian.fit, col='blue')
#dev.off()

#MCMC Output
#postscript('mcmcGaussian.eps',width=5,height=5)
#pdf('mcmcGaussian.pdf',width=5,height=5)
plot(ideol.robust, xlab="Distance in Kilometers", ylab="Semivariance",ylim=c(600,725))
par(new=T)
plot(resid.robust, xlab="", ylab="",axes=F,ylim=c(600,800),pch='+', col='blue')
lines.variomodel(cov.model="gaussian",cov.pars=c(87.0752,4860.5613),nug=610.5291,col='red',max.dist=6000)
#dev.off()

#### PROCESSING FORECASTS ####
for(i in 1:N){
	check<-file.exists(paste('bootstrap/forecast',i,'.csv',sep=''))
	if(check){
		assign(paste('f',i,sep=''),read.csv(paste('bootstrap/forecast',i,'.csv',sep='')))
	}else{
		print(paste('File forecast',i,'.csv does not exist.',sep=''))
		}
	}

#merge and drop component files
forecasts<-rbind(f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99,f100)
rm(f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99,f100)
forecasts<-subset(forecasts,subset=predictive.mean>=0 & predictive.mean<=100)

###obtain overall state averages from kriging###
stateKriging<-aggregate(x=forecasts$predictive.mean, by=list(STATEA=forecasts$STATEA), FUN=mean)
colnames(stateKriging)[2]<-'krige.state'
between<-aggregate(x=forecasts$predictive.mean, by=list(STATEA=forecasts$STATEA), FUN=var)
within<-aggregate(x=forecasts$predictive.variance, by=list(STATEA=forecasts$STATEA), FUN=mean)
no.kriges<-table(forecasts$STATEA)
stateKriging$krige.state.var<-within[,2]+((no.kriges+1)/no.kriges)*between[,2]

#merge with common space scores for Senate members
senateElec<-subset(nominate,subset=cd==0)
senateElec$STATEA<-recode(senateElec$state,'41=1;81=2;61=4;42=5;71=6;62=8;1=9;11=10;55=11;43=12;44=13;82=15;63=16;21=17;22=18;31=19;32=20;51=21;45=22;2=23;52=24;3=25;23=26;33=27;46=28;34=29;64=30;35=31;65=32;4=33;12=34;66=35;13=36;47=37;36=38;24=39;53=40;72=41;14=42;5=44;48=45;37=46;54=47;49=48;67=49;6=50;40=51;73=53;56=54;25=55;68=56')
senateElec$stateCD<-senateElec$STATEA*100+senateElec$cd
stateCombined<-merge(x=stateKriging,y=senateElec,by="STATEA")

#observe association with ideal points
#postscript('senate.eps',width=5,height=5)
#pdf('senate.pdf',width=5,height=5)
plot(y=stateCombined$dwnom1,x=stateCombined$krige.state,xlab="State Ideology (Kriging)", ylab="Legislator Ideology (1st Dim., Common Space)", main="U.S. Senate", type="n")
points(y=stateCombined$dwnom1[stateCombined$party==200],x=stateCombined$krige.state[stateCombined$party==200],pch="R",col="red")
points(y=stateCombined$dwnom1[stateCombined$party==100],x=stateCombined$krige.state[stateCombined$party==100],pch="D",col="blue")
abline(lm(stateCombined$dwnom1~stateCombined$krige.state),col='gray20')
#dev.off()
cor(stateCombined$dwnom1,stateCombined$krige.state)#.=0.6138887
summary(lm(stateCombined$dwnom1~stateCombined$krige.state))

#add in Obama's 2012 vote share, observe association
stateKriging$obama<-recode(stateKriging $STATEA,'1=38.78377147;2=42.68470952;4=45.38661978;5=37.84559465;6=61.87281116;8=52.74800715;9=58.77257748;10=59.44695492;11=92.5876492;12=50.44225214;13=46.0433509;15=71.7038485;16=33.57861316;17=58.57752339;18=44.7996254;19=52.95935193;20=38.88667325;21=38.45722761;22=41.25317439;23=57.85992139;24=63.32172579;25=61.78568075;26=54.80053207;27=53.94122646;28=44.1981008;29=45.22133325;30=42.96576814;31=38.87060973;32=53.40754216;33=52.83375198;34=58.98685114;35=55.29520465;36=64.30089453;37=48.9659651;38=39.88210486;39=51.51455254;40=33.22768026;41=56.27116718;42=52.731934;44=64.01674598;45=44.6917453;46=40.78150086;47=39.64892847;48=41.99210278;49=25.3738111;50=68.24725883;51=51.96737669;53=57.62829827;54=36.32570237;55=53.5163824;56=28.83936599')
stateCombined$obama<-recode(stateCombined $STATEA,'1=38.78377147;2=42.68470952;4=45.38661978;5=37.84559465;6=61.87281116;8=52.74800715;9=58.77257748;10=59.44695492;11=92.5876492;12=50.44225214;13=46.0433509;15=71.7038485;16=33.57861316;17=58.57752339;18=44.7996254;19=52.95935193;20=38.88667325;21=38.45722761;22=41.25317439;23=57.85992139;24=63.32172579;25=61.78568075;26=54.80053207;27=53.94122646;28=44.1981008;29=45.22133325;30=42.96576814;31=38.87060973;32=53.40754216;33=52.83375198;34=58.98685114;35=55.29520465;36=64.30089453;37=48.9659651;38=39.88210486;39=51.51455254;40=33.22768026;41=56.27116718;42=52.731934;44=64.01674598;45=44.6917453;46=40.78150086;47=39.64892847;48=41.99210278;49=25.3738111;50=68.24725883;51=51.96737669;53=57.62829827;54=36.32570237;55=53.5163824;56=28.83936599')
stateKriging$abbr<-recode(stateKriging$STATEA,"1='AL';2='AK';4='AZ';5='AR';6='CA';8='CO';9='CT';10='DE';11='DC';12='FL';13='GA';15='HI';16='ID';17='IL';18='IN';19='IA';20='KA';21='KY';22='LA';23='ME';24='MD';25='MA';26='MI';27='MN';28='MS';29='MO';30='MT';31='NE';32='NV';33='NH';34='NJ';35='NM';36='NY';37='NC';38='ND';39='OH';40='OK';41='OR';42='PA';44='RI';45='SC';46='SD';47='TN';48='TX';49='UT';50='VT';51='VA';53='WA';54='WV';55='WI';56='WY'")
#stateKriging<-read.csv('stateWholeKrigingFine.csv')
#postscript('obama.eps',width=5,height=5)
#pdf('obama.pdf',width=5,height=5)
plot(y= stateKriging$obama[-9],x= stateKriging$krige.state[-9],xlab="State Ideology (Kriging)", ylab="Obama Share of Two-Party Vote",  type="n", main="Presidential Vote")
text(stateKriging$abbr[-9],y= stateKriging$obama[-9],x= stateKriging$krige.state[-9])
abline(lm(stateKriging$obama[-9]~ stateKriging$krige.state[-9]),col='gray20')
#dev.off()
cor(stateKriging$obama[-9], stateKriging$krige.state[-9])#-0.7853699
summary(lm(stateKriging$obama[-9]~ stateKriging$krige.state[-9]))




###obtain congressional district averages from kriging###
forecasts$CDA[forecasts$CDA==0]<-1
forecasts$stateCong<-forecasts$STATEA*100+forecasts$CDA
congKriging<-aggregate(x=forecasts$predictive.mean, by=list(stateCD=forecasts$stateCong), FUN=mean)
colnames(congKriging)[2]<-'krige.cong'
between<-aggregate(x=forecasts$predictive.mean, by=list(stateCD=forecasts$stateCong), FUN=var)
within<-aggregate(x=forecasts$predictive.variance, by=list(stateCD=forecasts$stateCong), FUN=mean)
no.kriges<-table(forecasts$stateCong)
congKriging$krige.cong.var<-within[,2]+((no.kriges+1)/no.kriges)*between[,2]

#merge with common space scores for House members
houseElec<-subset(nominate,subset=cd!=0)
houseElec$STATEA<-recode(houseElec$state,'41=1;81=2;61=4;42=5;71=6;62=8;1=9;11=10;55=11;43=12;44=13;82=15;63=16;21=17;22=18;31=19;32=20;51=21;45=22;2=23;52=24;3=25;23=26;33=27;46=28;34=29;64=30;35=31;65=32;4=33;12=34;66=35;13=36;47=37;36=38;24=39;53=40;72=41;14=42;5=44;48=45;37=46;54=47;49=48;67=49;6=50;40=51;73=53;56=54;25=55;68=56')
houseElec$stateCD<-houseElec$STATEA*100+houseElec$cd
congCombined<-merge(x=congKriging,y=houseElec,by="stateCD")

#congCombined<-read.csv('congLegis.csv')
#observe association with ideal points
#postscript('congDist.eps',width=5,height=5)
#pdf('congDist.pdf',width=5,height=5)
plot(y=congCombined$dwnom1,x=congCombined$krige.cong,xlab="District Ideology (Kriging)", ylab="Legislator Ideology (1st Dim., Common Space)", main="U.S. House of Representatives", type="n")
points(y=congCombined$dwnom1[congCombined$party==200],x=congCombined$krige.cong[congCombined$party==200],pch="R",col="red")
points(y=congCombined$dwnom1[congCombined$party==100],x=congCombined$krige.cong[congCombined$party==100],pch="D",col="blue")
abline(lm(congCombined$dwnom1~congCombined$krige.cong),col='gray20')
#dev.off()
cor(congCombined$dwnom1,congCombined$krige.cong)#0.5811137
summary(lm(congCombined$dwnom1~congCombined$krige.cong))

#Add extra information 
hold<-subset(houseElec,select=c(stateCD,STATEA,statenm,cd))
congKriging<-merge(x=congKriging,y=hold,by='stateCD')



###obtain state lower chamber district averages from kriging###
#need to count how many observations by state and district (harder with "lettered" districts)
forecasts$SLDLA<-as.character(forecasts$SLDLA)
forecasts$lowerKluge<-paste(forecasts$STATEA,forecasts$SLDLA)
forecasts$constant<-1
lowerKlugeAgg.1<-aggregate(x=cbind(forecasts$STATEA,forecasts$SLDLA),by=list(lowerKluge=forecasts$lowerKluge), FUN=head,n=1)
colnames(lowerKlugeAgg.1)[2:3]<-c("STATEA","lower")
lowerKlugeAgg.2<-aggregate(x=forecasts$constant,by=list(lowerKluge=forecasts$lowerKluge), FUN=sum)
colnames(lowerKlugeAgg.2)[2]<-'no.kriges'
lowerKlugeAgg<-merge(x=lowerKlugeAgg.1,y=lowerKlugeAgg.2,by="lowerKluge")

#create averages and merge with counts
lowerKriging<-aggregate(x=forecasts$predictive.mean, by=list(STATEA=forecasts$STATEA, lower=forecasts$SLDLA), FUN=mean)
colnames(lowerKriging)[3]<-'krige.lower'
lowerKriging<-merge(x=lowerKriging,y=lowerKlugeAgg,by=c("STATEA","lower"))

#measure variation
between<-aggregate(x=forecasts$predictive.mean, by=list(STATEA=forecasts$STATEA, lower=forecasts$SLDLA), FUN=var)
within<-aggregate(x=forecasts$predictive.variance, by=list(STATEA=forecasts$STATEA, lower=forecasts$SLDLA), FUN=mean)
colnames(between)[3]<-"between"
colnames(within)[3]<-"within"
lowerKriging<-merge(x=lowerKriging,y=between,by=c("STATEA","lower"))
lowerKriging<-merge(x=lowerKriging,y=within,by=c("STATEA","lower"))
lowerKriging$krige.lower.var<-lowerKriging$within+((lowerKriging$no.kriges+1)/lowerKriging$no.kriges)*lowerKriging$between
lowerKriging$st<-as.character(recode(lowerKriging$STATEA,'1="AL";2="AK";4="AZ";5="AR";6="CA";8="CO";9="CT";10="DE";11="DC";12="FL";13="GA";15="HI";16="ID";17="IL";18="IN";19="IA";20="KS";21="KY";22="LA";23="ME";24="MD";25="MA";26="MI";27="MN";28="MS";29="MO";30="MT";31="NE";32="NV";33="NH";34="NJ";35="NM";36="NY";37="NC";38="ND";39="OH";40="OK";41="OR";42="PA";44="RI";45="SC";46="SD";47="TN";48="TX";49="UT";50="VT";51="VA";53="WA";54="WV";55="WI";56="WY"'))
lowerKriging<-subset(lowerKriging,subset=!st%in%c("DC","NE"),select=-c(no.kriges,between,within))

#merge-in legislator ideal points
lowerElec<-subset(house2011,select=c(name,party,st,st_id,np_score,hdistrict2011))
colnames(lowerElec)[6]<-"lower"
lowerCombined<-merge(x=lowerKriging,y=lowerElec,by=c("st","lower"),all.x=T) 
#table(lowerCombined$st[is.na(lowerCombined$np_score)]) #KY and MA missing.
#dim(lowerCombined)#5446

#observe association with ideal points
#postscript('lowerChambers.eps',width=5,height=5)
#pdf('lowerChambers.pdf',width=5,height=5)
plot(y=lowerCombined$np_score,x=lowerCombined$krige.lower,xlab="District Ideology (Kriging)", ylab="Legislator Ideology (Shor & McCarty)", main="State Legislatures: Lower Chambers", type="n")#
points(y=lowerCombined$np_score[lowerCombined$party=="R"],x=lowerCombined$krige.lower[lowerCombined$party=="R"],pch=".",col="red")
points(y=lowerCombined$np_score[lowerCombined$party=="D"],x=lowerCombined$krige.lower[lowerCombined$party=="D"],pch=".",col="blue")
abline(lm(lowerCombined$np_score~lowerCombined$krige.lower),col='gray20')
#dev.off()
cor(lowerCombined$np_score,lowerCombined$krige.lower,use="complete.obs")#0.4232543
summary(lm(lowerCombined$np_score~lowerCombined$krige.lower))



###obtain state upper chamber district averages from kriging###
#need to count how many observations by state and district (harder with "lettered" districts)
forecasts$SLDUA<-as.character(forecasts$SLDUA)
forecasts$upperKluge<-paste(forecasts$STATEA,forecasts$SLDUA)
forecasts$constant<-1
upperKlugeAgg.1<-aggregate(x=cbind(forecasts$STATEA,forecasts$SLDUA),by=list(upperKluge=forecasts$upperKluge), FUN=head,n=1)
colnames(upperKlugeAgg.1)[2:3]<-c("STATEA","upper")
upperKlugeAgg.2<-aggregate(x=forecasts$constant,by=list(upperKluge=forecasts$upperKluge), FUN=sum)
colnames(upperKlugeAgg.2)[2]<-'no.kriges'
upperKlugeAgg<-merge(x=upperKlugeAgg.1,y=upperKlugeAgg.2,by="upperKluge")

#create averages and merge with counts
upperKriging<-aggregate(x=forecasts$predictive.mean, by=list(STATEA=forecasts$STATEA, upper=forecasts$SLDUA), FUN=mean)
colnames(upperKriging)[3]<-'krige.upper'
upperKriging<-merge(x=upperKriging,y=upperKlugeAgg,by=c("STATEA","upper"))

#measure variation
between<-aggregate(x=forecasts$predictive.mean, by=list(STATEA=forecasts$STATEA, upper=forecasts$SLDUA), FUN=var)
within<-aggregate(x=forecasts$predictive.variance, by=list(STATEA=forecasts$STATEA, upper=forecasts$SLDUA), FUN=mean)
colnames(between)[3]<-"between"
colnames(within)[3]<-"within"
upperKriging<-merge(x=upperKriging,y=between,by=c("STATEA","upper"))
upperKriging<-merge(x=upperKriging,y=within,by=c("STATEA","upper"))
upperKriging$krige.upper.var<-upperKriging$within+((upperKriging$no.kriges+1)/upperKriging$no.kriges)*upperKriging$between
upperKriging$st<-as.character(recode(upperKriging$STATEA,'1="AL";2="AK";4="AZ";5="AR";6="CA";8="CO";9="CT";10="DE";11="DC";12="FL";13="GA";15="HI";16="ID";17="IL";18="IN";19="IA";20="KS";21="KY";22="LA";23="ME";24="MD";25="MA";26="MI";27="MN";28="MS";29="MO";30="MT";31="NE";32="NV";33="NH";34="NJ";35="NM";36="NY";37="NC";38="ND";39="OH";40="OK";41="OR";42="PA";44="RI";45="SC";46="SD";47="TN";48="TX";49="UT";50="VT";51="VA";53="WA";54="WV";55="WI";56="WY"'))
upperKriging<-subset(upperKriging,subset=!st%in%c("DC"),select=-c(no.kriges,between,within))

#merge-in legislator ideal points
upperElec<-subset(senate2011,select=c(name,party,st,st_id,np_score,sdistrict2011))
colnames(upperElec)[6]<-"upper"
upperCombined<-merge(x=upperKriging,y=upperElec,by=c("st","upper"),all.x=T)
#table(upperCombined$st[is.na(upperCombined$np_score)]) #KY, MA, MN, and MO missing.
#dim(upperCombined)#1989

#observe association with ideal points
#postscript('upperChambersAlt.eps',width=5,height=5)
#pdf('upperChambersAlt.pdf',width=5,height=5)
plot(y=upperCombined$np_score,x=upperCombined$krige.upper,xlab="District Ideology (Kriging)", ylab="Legislator Ideology (Shor & McCarty)", main="State Legislatures: Upper Chambers", type="n")
points(y=upperCombined$np_score[upperCombined$party=="R"],x=upperCombined$krige.upper[upperCombined$party=="R"],pch=".",col="red")
points(y=upperCombined$np_score[upperCombined$party=="D"],x=upperCombined$krige.upper[upperCombined$party=="D"],pch=".",col="blue")
abline(lm(upperCombined$np_score~upperCombined$krige.upper),col='gray20')
#dev.off()
cor(upperCombined$np_score,upperCombined$krige.upper,use="complete.obs")#0.4756057
summary(lm(upperCombined$np_score~upperCombined$krige.upper))




#write output data
write.csv(stateKriging, 'stateWholeKrigingFine.csv', row.names=F)
write.csv(stateCombined, 'stateWholeLegis.csv', row.names=F)
write.csv(congKriging, 'congKrigingFine.csv', row.names=F)
write.csv(congCombined, 'congLegis.csv', row.names=F)
write.csv(lowerKriging, 'stateLowerKrigingFine.csv', row.names=F)
write.csv(lowerCombined, 'stateLowerLegis.csv', row.names=F)
write.csv(upperKriging, 'stateUpperKrigingFine.csv', row.names=F)
write.csv(upperCombined, 'stateUpperLegis.csv', row.names=F)

#write as .rda files
lowerCombined$name<-replace_non_ascii(lowerCombined$name)
upperCombined$name<-replace_non_ascii(upperCombined$name)
save(stateCombined,file="stateCombined.rda",compress='xz')
save(congCombined,file="congCombined.rda",compress='xz')
save(lowerCombined,file="lowerCombined.rda",compress='xz')
save(upperCombined,file="upperCombined.rda",compress='xz')

#read output data, get correlations
#setwd("/Volumes/MONOGAN/gillAreal/ccesCode/MergedLegis")
#stateCombined <-read.csv('stateWholeLegis.csv')
#congCombined <-read.csv('congLegis.csv')
#lowerCombined <-read.csv('stateLowerLegis.csv')
#upperCombined <-read.csv('stateUpperLegis.csv')
cor(stateCombined$krige.state,stateCombined$obama)
cor(stateCombined$krige.state,stateCombined$dwnom1)
cor(congCombined$krige.cong,congCombined$dwnom1)
cor(upperCombined$krige.upper,upperCombined$np_score,use="pairwise")
cor(lowerCombined$krige.lower,lowerCombined$np_score,use="pairwise")

